/**
* This software is licensed to you under the Apache License, Version 2.0 (the
* "Apache License").
*
* LinkedIn's contributions are made under the Apache License. If you contribute
* to the Software, the contributions will be deemed to have been made under the
* Apache License, unless you expressly indicate otherwise. Please do not make any
* contributions that would be inconsistent with the Apache License.
*
* You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, this software
* distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
* License for the specific language governing permissions and limitations for the
* software governed under the Apache License.
*
* © 2012 LinkedIn Corp. All Rights Reserved.
*/
package com.senseidb.conf;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.Format;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.springframework.util.Assert;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import com.senseidb.indexing.DefaultSenseiInterpreter;
import com.senseidb.indexing.DefaultSenseiInterpreter.IndexSpec;
import com.senseidb.indexing.MetaType;
public class SenseiSchema {
public static final String SRC_DATA_FIELD_NAME = "__SRC_DATA__";
public static final String SRC_DATA_COMPRESSED_FIELD_NAME = "stored";
public static final String EVENT_TYPE_FIELD = "type";
public static final String EVENT_FIELD = "data";
public static final String EVENT_TYPE_ADD = "add";
public static final String EVENT_TYPE_UPDATE = "update";
public static final String EVENT_TYPE_DELETE = "delete";
public static final String EVENT_TYPE_SKIP = "skip";
private static Logger logger = Logger.getLogger(SenseiSchema.class);
private String _uidField;
private String _deleteField;
private String _skipField;
private String _srcDataStore;
private String _srcDataField;
private boolean _compressSrcData;
private List<FacetDefinition> facets = new ArrayList<FacetDefinition>();
public static class FieldDefinition {
public Format formatter;
public boolean isMeta;
public IndexSpec textIndexSpec;
public String fromField;
public boolean isMulti;
public boolean isActivity;
public String delim = ",";
public Class type = null;
public String name;
// indicates if the field name has any wildcards in it.
public boolean hasWildCards;
// compiled pattern if the field name has wildcards
public Pattern wildCardPattern;
}
public static class FacetDefinition {
public String name;
public String type;
public String column;
public Boolean dynamic;
public Boolean wildcard;
public Map<String, List<String>> params;
public Set<String> dependSet = new HashSet<String>();
public static FacetDefinition valueOf(JSONObject facet) {
try {
FacetDefinition ret = new FacetDefinition();
ret.name = facet.getString("name");
ret.type = facet.getString("type");
ret.column = facet.optString("column", ret.name);
JSONArray depends = facet.optJSONArray("depends");
if (depends != null) {
for (int i = 0; i < depends.length(); ++i) {
String dep = depends.getString(i).trim();
if (!dep.isEmpty()) {
ret.dependSet.add(dep);
}
}
}
JSONArray paramList = facet.optJSONArray("params");
ret.params = SenseiFacetHandlerBuilder.parseParams(paramList);
ret.wildcard = facet.optBoolean("wildcard", false);
return ret;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
}
private SenseiSchema() {
}
public String getUidField() {
return _uidField;
}
public String getDeleteField() {
return _deleteField;
}
public String getSkipField() {
return _skipField;
}
public String getSrcDataField() {
return _srcDataField;
}
public String getSrcDataStore() {
return _srcDataStore;
}
public boolean isCompressSrcData() {
return _compressSrcData;
}
public void setCompressSrcData(boolean _compressSrcData) {
this._compressSrcData = _compressSrcData;
}
public Map<String, FieldDefinition> getFieldDefMap() {
return _fieldDefMap;
}
private Map<String, FieldDefinition> _fieldDefMap;
private static JSONObject schemaObj;
public static SenseiSchema build(JSONObject schemaObj) throws JSONException, ConfigurationException {
SenseiSchema schema = new SenseiSchema();
schema.setSchemaObj(schemaObj);
schema._fieldDefMap = new HashMap<String, FieldDefinition>();
JSONObject tableElem = schemaObj.optJSONObject("table");
if (tableElem == null) {
throw new ConfigurationException("empty schema");
}
schema._uidField = tableElem.getString("uid");
schema._deleteField = tableElem.optString("delete-field", "");
schema._skipField = tableElem.optString("skip-field", "");
schema._srcDataStore = tableElem.optString("src-data-store", "");
schema._srcDataField = tableElem.optString("src-data-field", "src_data");
schema._compressSrcData = tableElem.optBoolean("compress-src-data", true);
JSONArray columns = tableElem.optJSONArray("columns");
int count = 0;
if (columns != null) {
count = columns.length();
}
for (int i = 0; i < count; ++i) {
JSONObject column = columns.getJSONObject(i);
try {
String n = column.getString("name");
String t = column.getString("type");
String frm = column.optString("from");
FieldDefinition fdef = new FieldDefinition();
fdef.formatter = null;
fdef.fromField = frm.length() > 0 ? frm : n;
fdef.isMeta = true;
fdef.isMulti = column.optBoolean("multi");
fdef.isActivity = column.optBoolean("activity");
fdef.name = n;
String delimString = column.optString("delimiter");
if (delimString != null && delimString.trim().length() > 0) {
fdef.delim = delimString;
}
fdef.hasWildCards = column.optBoolean("wildcard");
if (fdef.hasWildCards) {
Assert.isTrue(fdef.fromField.equals(fdef.name), "Cannot have a different \"from\" field with wildcards");
fdef.wildCardPattern = Pattern.compile(fdef.name);
}
schema._fieldDefMap.put(n, fdef);
if (t.equals("int")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(int.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = int.class;
} else if (t.equals("short")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(short.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = int.class;
} else if (t.equals("long")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(long.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = long.class;
} else if (t.equals("float")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(float.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = double.class;
} else if (t.equals("double")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(double.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = double.class;
} else if (t.equals("char")) {
fdef.formatter = null;
} else if (t.equals("string")) {
fdef.formatter = null;
} else if (t.equals("boolean")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(boolean.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.type = boolean.class;
} else if (t.equals("date")) {
String f = "";
try {
f = column.optString("format");
} catch (Exception ex) {
logger.error(ex.getMessage(), ex);
}
if (f.isEmpty())
throw new ConfigurationException("Date format cannot be empty.");
fdef.formatter = new SimpleDateFormat(f);
fdef.type = Date.class;
} else if (t.equals("text")) {
fdef.isMeta = false;
String idxString = column.optString("index", null);
String storeString = column.optString("store", null);
String tvString = column.optString("termvector", null);
Index idx = idxString == null ? Index.ANALYZED : DefaultSenseiInterpreter.INDEX_VAL_MAP.get(idxString.toUpperCase());
Store store = storeString == null ? Store.NO : DefaultSenseiInterpreter.STORE_VAL_MAP.get(storeString.toUpperCase());
TermVector tv = tvString == null ? TermVector.NO : DefaultSenseiInterpreter.TV_VAL_MAP.get(tvString.toUpperCase());
if (idx == null || store == null || tv == null) {
throw new ConfigurationException("Invalid indexing parameter specification");
}
IndexSpec indexingSpec = new IndexSpec();
indexingSpec.store = store;
indexingSpec.index = idx;
indexingSpec.tv = tv;
fdef.textIndexSpec = indexingSpec;
}
} catch (Exception e) {
throw new ConfigurationException("Error parsing schema: "
+ column, e);
}
}
JSONArray facetsList = schemaObj.optJSONArray("facets");
if (facetsList != null) {
for (int i = 0; i < facetsList.length(); i++) {
JSONObject facet = facetsList.optJSONObject(i);
if (facet != null) {
schema.facets.add(FacetDefinition.valueOf(facet));
}
}
}
return schema;
}
@Deprecated
public static SenseiSchema build(Document schemaDoc) throws ConfigurationException {
SenseiSchema schema = new SenseiSchema();
schema._fieldDefMap = new HashMap<String, FieldDefinition>();
NodeList tables = schemaDoc.getElementsByTagName("table");
if (tables == null || tables.getLength() == 0) {
throw new ConfigurationException("empty schema");
}
if (tables.getLength() > 1) {
throw new ConfigurationException("multiple schemas not supported");
}
Element tableElem = (Element) tables.item(0);
schema._uidField = tableElem.getAttribute("uid");
schema._deleteField = tableElem.getAttribute("delete-field");
if (schema._deleteField == null) schema._deleteField = "";
schema._skipField = tableElem.getAttribute("skip-field");
if (schema._skipField == null) schema._skipField = "";
schema._srcDataStore = tableElem.getAttribute("src-data-store");
if (schema._srcDataStore == null) schema._srcDataStore = "";
schema._srcDataField = tableElem.getAttribute("src-data-field");
if (schema._srcDataField == null || schema._srcDataField.length() == 0) schema._srcDataField = "src_data";
schema._compressSrcData = true;
String compress = tableElem.getAttribute("compress-src-data");
if (compress != null && "false".equals(compress))
schema._compressSrcData = false;
NodeList columns = tableElem.getElementsByTagName("column");
for (int i = 0; i < columns.getLength(); ++i) {
try {
Element column = (Element) columns.item(i);
String n = column.getAttribute("name");
String t = column.getAttribute("type");
String frm = column.getAttribute("from");
FieldDefinition fdef = new FieldDefinition();
fdef.formatter = null;
fdef.fromField = frm.length() > 0 ? frm : n;
fdef.isMeta = true;
fdef.isMulti = false;
String isMultiString = column.getAttribute("multi");
if (isMultiString != null && isMultiString.trim().length() > 0) {
fdef.isMulti = Boolean.parseBoolean(isMultiString);
}
String isActivityString = column.getAttribute("activity");
if (isActivityString != null && isActivityString.trim().length() > 0) {
fdef.isActivity = Boolean.parseBoolean(isActivityString);
}
String delimString = column.getAttribute("delimiter");
if (delimString != null && delimString.trim().length() > 0) {
fdef.delim = delimString;
}
fdef.hasWildCards = Boolean.parseBoolean(column.getAttribute("wildcard"));
if (fdef.hasWildCards) {
Assert.isTrue(fdef.fromField.equals(fdef.name), "Cannot have a different \"from\" field with wildcards");
fdef.wildCardPattern = Pattern.compile(fdef.name);
}
schema._fieldDefMap.put(n, fdef);
if (t.equals("int")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(int.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = int.class;
} else if (t.equals("short")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(short.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = int.class;
} else if (t.equals("long")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(long.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = long.class;
} else if (t.equals("float")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(float.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = double.class;
} else if (t.equals("double")) {
MetaType metaType = DefaultSenseiInterpreter.CLASS_METATYPE_MAP.get(double.class);
String formatString = DefaultSenseiInterpreter.DEFAULT_FORMAT_STRING_MAP.get(metaType);
fdef.formatter = new DecimalFormat(formatString, new DecimalFormatSymbols(Locale.US));
fdef.type = double.class;
} else if (t.equals("char")) {
fdef.formatter = null;
} else if (t.equals("string")) {
fdef.formatter = null;
} else if (t.equals("boolean")) {
fdef.formatter = null;
} else if (t.equals("date")) {
String f = "";
try {
f = column.getAttribute("format");
} catch (Exception ex) {
logger.error(ex.getMessage(), ex);
}
if (f.isEmpty())
throw new ConfigurationException("Date format cannot be empty.");
fdef.formatter = new SimpleDateFormat(f);
fdef.type = Date.class;
} else if (t.equals("text")) {
fdef.isMeta = false;
String idxString = column.getAttribute("index");
String storeString = column.getAttribute("store");
String tvString = column.getAttribute("termvector");
Index idx = idxString == null ? Index.ANALYZED : DefaultSenseiInterpreter.INDEX_VAL_MAP.get(idxString.toUpperCase());
Store store = storeString == null ? Store.NO : DefaultSenseiInterpreter.STORE_VAL_MAP.get(storeString.toUpperCase());
TermVector tv = tvString == null ? TermVector.NO : DefaultSenseiInterpreter.TV_VAL_MAP.get(tvString.toUpperCase());
if (idx == null || store == null || tv == null) {
throw new ConfigurationException("Invalid indexing parameter specification");
}
IndexSpec indexingSpec = new IndexSpec();
indexingSpec.store = store;
indexingSpec.index = idx;
indexingSpec.tv = tv;
fdef.textIndexSpec = indexingSpec;
}
} catch (Exception e) {
throw new ConfigurationException("Error parsing schema: "
+ columns.item(i), e);
}
}
JSONArray facetsList = schemaObj.optJSONArray("facets");
if (facetsList != null) {
for (int i = 0; i < facetsList.length(); i++) {
JSONObject facet = facetsList.optJSONObject(i);
if (facet != null) {
schema.facets.add(FacetDefinition.valueOf(facet));
}
}
}
return schema;
}
public List<FacetDefinition> getFacets() {
return facets;
}
public JSONObject getSchemaObj() {
return schemaObj;
}
public void setSchemaObj(JSONObject schemaObj) {
SenseiSchema.schemaObj = schemaObj;
}
}